The output ko_abundance.tsv file was open with Excel to create an Excel file (ko_totalabundance.xlsx) with all three necessary data groups to work with Phyloseq objects: - RPKM_df contains the abundance of each KO in the sample calculated in RPKM. This information is in the ‘RPKM’ sheet of the ‘ko_totalabundance.xlsx’ file. - KO_df contains the KO ids together with its description and symbol. This information is in the ‘KO’ sheet of the ‘ko_totalabundance.xlsx’ file. - samples_df contains the metadata of each sample. This information is in the ‘metadata’ sheet of the ‘ko_totalabundance.xlsx’ file.
#Loading the data
setwd("~/Desktop/RyC_TFM/final_results")
RPKM_df <- read_excel("ko_totalabundance.xlsx", sheet = "RPKM")
KO_df <- read_excel("ko_totalabundance.xlsx", sheet = "KO")
samples_df <- read_excel("ko_totalabundance.xlsx", sheet = "metadata")
# We want to add a new samples variable to merge the subjects' Group (Donor, Fecal Microbial Transplant (FMT) or Placebo) with the data collecting Weeks variables
samples_df$group_week <- paste (samples_df$Group, samples_df$week, sep = "_")
A Phyloseq object is created with the data stored in the previously loaded dataframes that are firstly transformed to matrices when needed. Additionally, abundances are normalized using the Median Scaling method. We will obtain the ‘raw_ps’ phyloseq object with the raw read count data.
# Define the row names from the KEGG_ko column in the RPKM_df
RPKM_df <- RPKM_df %>%
tibble::column_to_rownames("KEGG_ko")
RPKM_df[is.na(RPKM_df)] <- 0
# Idem for the other dataframes
KO_df <- KO_df %>%
tibble::column_to_rownames("KEGG_ko")
samples_df <- samples_df %>%
tibble::column_to_rownames("Sample_ID")
#Transform into matrices RPKM and KO tables (sample table can be left as data frame)
RPKM_mat <- as.matrix(RPKM_df)
KO_mat <- as.matrix(KO_df)
class(RPKM_mat) <- "numeric" # Because it contains the RPKM calculated for each KO. We are going to consider this values as normalized read counts for each KO
# Transform to Phyloseq objects.
# Since PhyloSeq is usually implemented for taxonomy and here we are working with functional annotations, some equivalencies are made:
RPKM = otu_table(RPKM_mat, taxa_are_rows = TRUE)
KO = tax_table(KO_mat)
samples = sample_data(samples_df)
# We want to have the raw phyloseq object which contains normalized read counts (RPKM) as integer numbers
raw_ps <- phyloseq(RPKM, KO, samples)
round_fun <- function(x) round(x)
raw_ps <- transform_sample_counts(raw_ps, round_fun)
Since PhyloSeq is usually implemented for taxonomy and here we are working with functional annotations, some equivalencies are made:
Taxa in this case is refering to the KOs identifiers. Each KO identifier has a Description, Symbol and again the KO (3 taxonomic ranks) which are stored in the Taxonomy table. The OTU Table corresponds then to the relative abundance of each KO (taxa) in RPKM units for each of the 117 samples that have 15 variables stored in the Sample Data section.
raw_ps # Phyloseq Object schema
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 7168 taxa and 117 samples ]
## sample_data() Sample Data: [ 117 samples by 15 sample variables ]
## tax_table() Taxonomy Table: [ 7168 taxa by 3 taxonomic ranks ]
ntaxa(raw_ps) # Number of KO
## [1] 7168
nsamples(raw_ps) # Number of samples
## [1] 117
sample_names(raw_ps) # All sample names
## [1] "R12W8" "R2W0" "R16W24" "543-46G1" "R30W7" "R2W1"
## [7] "R16W0" "R8W7" "R14W4" "R14W7" "R11W24" "R30W4"
## [13] "R20W24" "R24W1" "R20W8" "R14W0" "R18W24" "R4W8"
## [19] "R2W24X" "R12W24" "R24W7" "R14W1" "R23W24" "R29W24"
## [25] "R2W24Y" "R2W4" "R2W7" "0505-0101" "R30W1" "R30W0"
## [31] "R16W7" "R24W4" "R24W24" "R8W0" "R26W0" "R18W8"
## [37] "R10W7" "R20W0" "R14W24" "R4W7" "R18W4" "R24W8"
## [43] "R20W1" "R6W0" "R12W1" "R18W7" "R4W4" "R2W8"
## [49] "R12W0" "R19W24" "R21W24" "R6W7" "R30W8" "R18W0"
## [55] "R20W4" "R12W7" "R22W0" "R10W24" "R18W1" "R10W0"
## [61] "R4W1" "R12W4" "R4W0" "R14W8" "R17W24" "R20W6"
## [67] "R25W0" "R8W24" "R1W7" "R17W1" "R3W0" "R9W7"
## [73] "R1W5" "R29W8" "R17W0" "R7W8" "R30W24" "R23W8"
## [79] "R3W7" "R9W0" "R6W24" "R27W0" "R17W7" "R17W4"
## [85] "585-57G1" "R1W1" "R1W24" "R11W8" "R1W0" "R19W7"
## [91] "R7W0" "R17W8" "R29W0" "R23W7" "R11W4" "R29W1"
## [97] "R13W0" "R7W1" "R21W0" "R7W24" "R23W4" "R11W7"
## [103] "R29W5" "R21W7" "R4W24" "R1W8" "R11W0" "R7W4"
## [109] "R5W0" "R11W1" "R19W0" "R3W24" "R23W1" "R9W24"
## [115] "R7W7" "R29W7" "R23W0"
rank_names(raw_ps) # KO's characteristics
## [1] "Description" "Symbol" "KO"
sample_variables(raw_ps) # Sample variables
## [1] "week" "id"
## [3] "Group" "Donor"
## [5] "Age" "Sex"
## [7] "AIDS_events" "Nadir.CD4.T.cell"
## [9] "CD4..T.cells" "CD4.CD8.ratio"
## [11] "Antibiotics_.past_6_months" "Antibiotics_past_3_months"
## [13] "Completed.Follow.up" "Sample_id"
## [15] "group_week"
otu_table(raw_ps)[1:5, 1:5] # Overview of the relative abundance table of each KO in RPKM
## OTU Table: [5 taxa and 5 samples]
## taxa are rows
## R12W8 R2W0 R16W24 543-46G1 R30W7
## K07082 564 292 443 315 512
## K08303 1471 390 1361 994 1597
## K01872 1478 506 1188 801 1293
## K02503 446 366 374 333 384
## K02238 1138 375 909 595 1310
tax_table(raw_ps)[1:5, 1:2] # Overview of the the characteristics of each KO
## Taxonomy Table: [5 taxa by 2 taxonomic ranks]:
## Description Symbol
## K07082 "UPF0755 protein" "K07082"
## K08303 "U32 family peptidase [EC:3.4.-.-]" "prtC, trhP"
## K01872 "alanyl-tRNA synthetase [EC:6.1.1.7]" "AARS, alaS"
## K02503 "histidine triad (HIT) family protein" "HINT1, hinT, hit"
## K02238 "competence protein ComEC" "comEC"
In this step we are selecting the samples we want to work with and creating different subset raw phyloseq objects.
incomplete <- c("R24", "R13", "R22", "R25", "R26", "R27", "R5") # List of id patients with incomplete data
# Here we are selecting data collected at week 0 and 7 from both FMT, placebo and Donor subjects except from subjects with incomplete data
ps_raw_subset <- subset_samples(raw_ps, (week == "Week_0" | week == "Week_7") & !(id %in% incomplete))
# Here we are selecting data collected at week 0 and 7 from subjects that received the FMT from the already subset phyloseq object
ps_raw_FMT <- subset_samples(ps_raw_subset, Group=="FMT")
# Here we are selecting data collected at week 0 and 7 from Placebo subjects from the already subset phyloseq object
ps_raw_placebo <- subset_samples(ps_raw_subset, Group=="Placebo")
# Here we are selecting the samples collected at each week
ps_raw_week0 <- subset_samples(ps_raw_subset, (Group == "Placebo" | Group == "FMT") & week == "Week_0")
ps_raw_week7 <- subset_samples(ps_raw_subset, (Group == "Placebo" | Group == "FMT") & week == "Week_7")
The DESeq2 normalization method is a statistical approach used to adjust for systematic biases and differences in sequencing depth when analyzing count data. It involves estimating size factors, which account for differences in library size between samples, and dispersion, which capture the variability in read counts. The method applies a variance-stabilizing transformation to the counts, making them more suitable for downstream statistical analysis. This normalization process allows for valid comparisons between samples and enables the identification of statistically significant differences in expression levels or abundances while controlling for technical variations.
set.seed(0503)
# First we need to convert the raw phyloseq object to a DESeqDataSet object
ps_dds <- phyloseq_to_deseq2(ps_raw_subset, ~Group + week )
# and run DESeq2 standard analysis:
ps_dds <- DESeq(ps_dds)
# geometric mean, set to zero when all coordinates are zero
geo_mean_protected <- function(x) {
if (all(x == 0)) {
return (0)
}
exp(mean(log(x[x != 0])))
}
geoMeans <- apply(counts(ps_dds), 1, geo_mean_protected) # geometric mean
ps_dds <- estimateSizeFactors(ps_dds, geoMeans = geoMeans) # estimate size factors. Size factors account for differences in sequencing depth between samples and are used to normalize the read counts
ps_dds <- estimateDispersions(ps_dds) # Estimate dispersions. Dispersions capture the variation in read counts across samples and are used in downstream statistical analyses.
abund <- getVarianceStabilizedData(ps_dds) # variance-stabilized transformed data. This transformation stabilizes the variance and makes the data more suitable for statistical analysis.
# making our phyloseq object with transformed table
vst_count_phy <- otu_table(abund, taxa_are_rows=T)
sample_info_tab_phy <- sample_data(samples_df)
vst_physeq <- phyloseq(vst_count_phy, sample_info_tab_phy)
Additionally, other subsets with the 50 most abundant KO among all samples and the KOs involved in the Butyrate production pathway are created.
# Get the top50 most abundant KOs out of the filtered phyloseq object
top50 <- names(sort(taxa_sums(ps_raw_subset), decreasing=TRUE)[1:50])
# Create a phyloseq object only with the top50 KOs
ps_top50 = prune_taxa(top50, ps_raw_subset)
This subsets are visually explored in bar plots.
plot <- plot_bar(ps_top50, fill = "KO") +
geom_bar(aes(color=KO, fill=KO), stat="identity", position="fill") +
labs(x="Samples", y="Abundance", title="The 50 most abundant KO") +
facet_wrap(~week, scales="free_x") +
theme(axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
plot
ps_top50_relab = phyloseq::transform_sample_counts(ps_top50, function(x){(x / sum(x))})
plot <- plot_bar(ps_top50_relab, fill = "KO") +
geom_bar(aes(color=KO, fill=KO), stat="identity", position="fill") +
labs(x="Samples", y="Relative abundance between Top50", title="The 50 most abundant KO") +
facet_wrap(~week, scales="free_x") +
theme(axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
plot
Next, we are going to calculate the Alpha Diversity of our samples. Alpha-diversity is often described as within-sample diversity. In this case, this measure will provide information about the functional richness of the sample. Here we are going to measure the alpha diversity with the observed richness and the Shannon index. The observed diversity evaluates how many different functions there are while the Shannon index also considers how evenly they are distributed.
To do so, we first need to calculate the diversity estimates from our phyloseq object, and test if the data are normally distributed using Shapiro-Wilk Normality test. If the data follows a normal distribution, we can run an ANOVA or t-test to find statistically significant difference between sample-types. If the data is not normally distributed, a non-parametric test such as Kruskal-Wallis will be considered.
First, we need to extract the estimates of the alpha diversity and then perform a Shapiro-Wilk Normality test to see if this data follows a normal distribution. The null hypothesis of this test is that “sample distribution is normal”. If the test is significant, the distribution is non-normal.
# Extract Shannon results
alpha_results <- estimate_richness(ps_raw_subset, measures = c("Shannon", "Observed"))
# Shapiro-Wilk Normality test.
# The null hypothesis of this test is that “sample distribution is normal”. If the test is significant, the distribution is non-normal.
shapiro.test(alpha_results$Shannon)
##
## Shapiro-Wilk normality test
##
## data: alpha_results$Shannon
## W = 0.96061, p-value = 0.1289
shapiro.test(alpha_results$Observed)
##
## Shapiro-Wilk normality test
##
## data: alpha_results$Observed
## W = 0.97096, p-value = 0.3136
From the output, the p-value > 0.05 implying that the distribution of the data are not significantly different from normal distribution. In other words, we can assume the normality for both Observed and Shannon alpha diversity estimates
symnum.args = list(cutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1), symbols = c("****", "***", "**", "*", "ns"))
# Plot all functional alpha diversity estimates colored by week
plot_richness(ps_raw_subset, x="Sample_id", color="week", measures=c("Shannon", "Observed")) +
labs(title="Alpha Diversity of each Sample", x="Sample IDs") +
theme(axis.text.x = element_text(size = 12),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 25),
axis.title.y = element_text(size = 25),
legend.title = element_text(size = 25),
legend.text = element_text(size = 20),
strip.text = element_text(size = 25),
plot.title = element_text(size = 30))
a_my_comparisons <- list(c("FMT_Week_0", "FMT_Week_7"), c("Placebo_Week_0", "Placebo_Week_7"), c("FMT_Week_0", "Placebo_Week_0"), c("FMT_Week_7", "Placebo_Week_7"))
plot_richness(ps_raw_subset, x="group_week", color="group_week", nrow = 2, measures=c("Shannon", "Observed")) +
geom_boxplot(aes(fill=group_week),alpha=0.2) +
labs(title="Alpha Diversity") +
stat_compare_means(method = "t.test", comparisons = a_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
Given the normal distribution of the data, we can plot the Alpha Diversity of each sample group and test its significance with t-tests.
b_my_comparisons <- list(c("FMT", "Placebo"), c("Donor", "FMT"), c("Placebo", "Donor"))
# Boxplot Shannon functional diversity grouped by Group and separated by Week
plot_richness(ps_raw_subset, x="Group", color="Group", measures=c("Shannon")) +
geom_boxplot(aes(fill=Group),alpha=0.2) +
labs(title="Shannon Index grouped by Subjects'") +
stat_compare_means(method = "t.test", comparisons = b_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) +
facet_wrap(~week, scales="free_x") +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
# Boxplot Observed functional diversity grouped by Group and separated by Week
plot_richness(ps_raw_subset, x="Group", color="Group", measures=c("Observed")) +
facet_wrap(~week, scales="free_x") +
geom_boxplot(aes(fill=Group),alpha=0.2) +
labs(title="Observed diversity grouped by Subjects'") +
stat_compare_means(method = "t.test", comparisons = b_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
c_my_comparisons <- list( c("Week_0", "Week_7"))
## Plot all Shannon functional alpha diversity grouped by week
plot_richness(ps_raw_subset, color = "week", x = "week", measures = c("Shannon")) +
geom_boxplot(aes(fill = week), alpha=0.2) +
stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8) +
labs(title="Shannon Index grouped by Week") +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
## Plot FMT functional alpha diversity grouped by week
alpha_results_FMT <- estimate_richness(ps_raw_FMT, measures = c("Shannon", "Observed"))
plot_richness(ps_raw_FMT, color = "week", x = "week", measures = c("Shannon")) +
geom_boxplot(aes(fill = week), alpha=0.2) +
stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE) +
labs(title="Shannon Index of FMT subjects grouped by Week") +
geom_line(aes(x = week, y = alpha_results_FMT$Shannon, group = id), color='black', alpha=0.1) +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
plot_richness(ps_raw_FMT, color = "week", x = "week", measures = c("Observed")) +
geom_boxplot(aes(fill = week), alpha=0.2) +
stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE) +
labs(title="Observed Diversity of FMT subjects grouped by Week") +
geom_line(aes(x = week, y = alpha_results_FMT$Observed, group = id), color='black', alpha=0.1) +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
## Plot Placebo functional alpha diversity grouped by week
alpha_results_placebo <- estimate_richness(ps_raw_placebo, measures = c("Shannon", "Observed"))
plot_richness(ps_raw_placebo, color = "week", x = "week", measures = c("Shannon")) +
geom_boxplot(aes(fill = week), alpha=0.2) +
stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE) +
labs(title="Shannon Index of Placebo subjects grouped by Week") +
geom_line(aes(x = week, y = alpha_results_placebo$Shannon, group = id), color='black', alpha=0.1) +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
plot_richness(ps_raw_placebo, color = "week", x = "week", measures = c("Observed")) +
geom_boxplot(aes(fill = week), alpha=0.2) +
stat_compare_means(method = "t.test", comparisons = c_my_comparisons, label = "p.signif", symnum.args = symnum.args, size=8, paired=TRUE, size=8) +
labs(title="Observed Alpha Diversity of Placebo subjects grouped by Week") +
geom_line(aes(x = week, y = alpha_results_placebo$Observed, group = id), color='black', alpha=0.1) +
theme(axis.text.x = element_text(size = 20),
axis.text.y = element_text(size = 20),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
strip.text = element_text(size = 20),
plot.title = element_text(size = 30))
# Beta Diversity
Beta-Diversity provides a measure of dissimilarity between sample types. The functional differences between samples can be quantified with several methods. For this functional analysis, the Bray-Curtis distance estimator was chosen. This method compares the abundances of different fuction between samples and calculates the dissimilarity based on the proportion of differences.
Between-samples differences are visualized with a principal coordinates analysis.
set.seed(0503)
# generating and visualizing the PCoA with phyloseq
vst_pcoa <- ordinate(vst_physeq, method="PCoA", distance="bray")
eigen_vals <- vst_pcoa$values$Eigenvalues # allows us to scale the axes according to their magnitude of separating apart the samples
plot_ordination(vst_physeq, vst_pcoa, color="Group") +
theme(aspect.ratio=1) +
labs(title = "PCoA (Bray-Curtis index)", subtitle = "DESeq2 Transformation") +
stat_ellipse(aes(group = Group), linetype = 2) +
geom_point(size = 3) +
theme(axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
plot.title = element_text(size = 25),
plot.subtitle = element_text(size = 20))
plot_ordination(vst_physeq, vst_pcoa, color="group_week") +
theme(aspect.ratio=1) +
labs(title = "PCoA (Bray-Curtis index)", subtitle = "DESeq2 Transformation") +
stat_ellipse(aes(group = group_week), linetype = 2) +
geom_point(size = 3) +
theme(axis.text.x = element_text(size = 15),
axis.text.y = element_text(size = 15),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20),
legend.title = element_text(size = 20),
legend.text = element_text(size = 15),
plot.title = element_text(size = 25),
plot.subtitle = element_text(size = 20))
Betadisper and permutational ANOVA
Here we are going to test if there is a statistically signficant difference between our sample types. One way to do this is with the betadisper and adonis functions from the vegan package. adonis can tell us if there is a statistical difference between groups. However, adonis assumes that there is no statistical difference within the same group. To check whether this assumption is met, we first need to perfom a betadisper permutest. If the p-value is non-significant (p-value > 0.05), there is a sufficient level of homogeneity of dispersion within groups. If there is not, then the adonis permanova test can be unreliable.
set.seed(1234)
# Betadisper Permutest
bray_dist = phyloseq::distance(vst_physeq, method="bray", weighted=T)
disp.age = betadisper(bray_dist, sample_data(vst_physeq)$Group)
disp.age$call
## betadisper(d = bray_dist, group = sample_data(vst_physeq)$Group)
permutest(disp.age, pairwise=TRUE, permutations=1000)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 2 2.8810e-06 1.4407e-06 0.318 1000 0.7473
## Residuals 42 1.9028e-04 4.5305e-06
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Donor FMT Placebo
## Donor 0.53846 0.6953
## FMT 0.51154 0.6044
## Placebo 0.66797 0.60276
The p-value of the betadisper permutest is higher than 0.05 so we can assume that sample groups are sufficiently homogenous and we can carry on with the adonis permanova test.
set.seed(1234)
# Adonis PERMANOVA analysis
adonis2(bray_dist ~ sample_data(vst_physeq)$Group)
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = bray_dist ~ sample_data(vst_physeq)$Group)
## Df SumOfSqs R2 F Pr(>F)
## sample_data(vst_physeq)$Group 2 0.00019555 0.06909 1.5585 0.043 *
## Residual 42 0.00263506 0.93091
## Total 44 0.00283061 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The p-value of the Adonis permanova test is lower than 0.05 so there is a statistically significant difference between groups.
set.seed(1234)
# Betadisper Permutest
bray_dist = phyloseq::distance(vst_physeq, method="bray", weighted=T)
disp.age = betadisper(bray_dist, sample_data(vst_physeq)$group_week)
disp.age$call
## betadisper(d = bray_dist, group = sample_data(vst_physeq)$group_week)
permutest(disp.age, pairwise=TRUE, permutations=1000)
##
## Permutation test for homogeneity of multivariate dispersions
## Permutation: free
## Number of permutations: 1000
##
## Response: Distances
## Df Sum Sq Mean Sq F N.Perm Pr(>F)
## Groups 4 1.3525e-05 3.3811e-06 0.7916 1000 0.5365
## Residuals 40 1.7085e-04 4.2712e-06
##
## Pairwise comparisons:
## (Observed p-value below diagonal, permuted p-value above diagonal)
## Donor_Week_0 FMT_Week_0 FMT_Week_7 Placebo_Week_0 Placebo_Week_7
## Donor_Week_0 0.36064 0.90509 0.99600 0.5754
## FMT_Week_0 0.36512 0.16683 0.17982 0.6004
## FMT_Week_7 0.89944 0.17219 0.85514 0.4655
## Placebo_Week_0 0.99698 0.17573 0.84693 0.4236
## Placebo_Week_7 0.58057 0.60387 0.48129 0.42323
The p-value of the betadisper permutest is higher than 0.05 so we can assume that sample groups are sufficiently homogenous and we can carry on with the adonis permanova test.
# Adonis PERMANOVA pairwise analysis
ps_norm_df <- as(sample_data(vst_physeq), "data.frame")
pairwise.adonis(bray_dist,ps_norm_df$group_week)
## pairs Df SumsOfSqs F.Model R2
## 1 FMT_Week_0 vs Donor_Week_0 1 7.611052e-05 1.0388635 0.06907859
## 2 FMT_Week_0 vs FMT_Week_7 1 7.066218e-05 1.0999184 0.04382159
## 3 FMT_Week_0 vs Placebo_Week_0 1 4.321374e-05 0.6505008 0.03310352
## 4 FMT_Week_0 vs Placebo_Week_7 1 5.172707e-05 0.7240113 0.03670710
## 5 Donor_Week_0 vs FMT_Week_7 1 8.807225e-05 1.5480230 0.09956398
## 6 Donor_Week_0 vs Placebo_Week_0 1 8.307470e-05 1.4465361 0.13847040
## 7 Donor_Week_0 vs Placebo_Week_7 1 8.563159e-05 1.2590216 0.12272336
## 8 FMT_Week_7 vs Placebo_Week_0 1 8.543820e-05 1.5714370 0.07638927
## 9 FMT_Week_7 vs Placebo_Week_7 1 1.357311e-04 2.2856860 0.10738137
## 10 Placebo_Week_0 vs Placebo_Week_7 1 3.374144e-05 0.5566305 0.03823896
## p.value p.adjusted sig
## 1 0.402 1.00
## 2 0.321 1.00
## 3 0.838 1.00
## 4 0.724 1.00
## 5 0.067 0.67
## 6 0.127 1.00
## 7 0.212 1.00
## 8 0.091 0.91
## 9 0.014 0.14
## 10 0.878 1.00
In the Pairwise adonis test, only comparisons between FMT_Week7 and Placebo_Week7 showed a significant p-value.
Find features that are up or down represented in the compared groups
using the microbial package that implements DESeq2. - The difftest
function from the microbial package finds features that are up or down
represented in the compared groups. The difftest function requires a
phyloseq object containing merged information of abundance, taxonomic
assignment and sample data including the measured variables and
categorical information of the samples. Since the DESeq2 transformation
is performed in this function, raw count values are preferred. In this
context, we will be using the ps_raw phyloseq object and its
subsets.
- The plotdiff function produces a figure with the top most annotated
features with corresponding adjusted p-values and abundance distribution
with respect to control conditions.
The group parameter is a character string specifying the name of a categorical variable containing grouping information. The pvalue and log2FC are thresholds for p values and log2 fold change. Adjusted p value cutoff is also supported by specify the padj paramater.
res1 <- difftest(ps_raw_FMT, group= "week", ref = "Week_0")
res1$Phylum <- res1$KO
write.table(res1,"results_deseq2_Week7vsWeek0_FMT.tsv", sep = "\t", quote = F, row.names = FALSE)
# DESCRIPTION
plotdiff(res1, level="Description", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for Week_7 vs Week_0 in FMT subjects \n pvalue >= 0.05, log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 10),
axis.text.y = element_text(color = "black", size = 10),
axis.title.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
# SYMBOL
plotdiff(res1, level="Symbol", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for Week_7 vs Week_0 in FMT subjects \n pvalue >= 0.05 , log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 10),
axis.text.y = element_text(color = "black", size = 10),
axis.title.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
res2 <- difftest(ps_raw_placebo, group= "week", ref = "Week_0")
res2$Phylum <- res2$KO
write.table(res2,"results_deseq2_Week7vsWeek0_placebo.tsv", sep = "\t", quote = F, row.names = FALSE)
# DESCRIPTION
plotdiff(res2, level="Description", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for Week_7 vs Week_0 in Placebo subjects \n pvalue >= 0.05 , log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 10),
axis.text.y = element_text(color = "black", size = 10),
axis.title.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
# SYMBOL
plotdiff(res2, level="Symbol", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for Week_7 vs Week_0 in Placebo subjects \n pvalue >= 0.05 , log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 10),
axis.text.y = element_text(color = "black", size = 10),
axis.title.y = element_text(size = 12),
axis.title.x = element_text(size = 12),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
res3 <- difftest(ps_raw_week0, group= "Group", ref = "Placebo")
res3$Phylum <- res3$KO
write.table(res3,"results_deseq2_FMTvsPlacebo_week0.tsv", sep = "\t", quote = F, row.names = FALSE)
# DESCRIPTION
plotdiff(res3, level="Description", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for FMT vs Placebo subjects in samples collected at Week 0 \n pvalue = 0.05 , log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 12),
axis.title.y = element_text(size = 14),
axis.title.x = element_text(size = 14),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
# SYMBOL
plotdiff(res3, level="Symbol", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for FMT vs Placebo subjects in samples collected at Week 0 \n pvalue = 0.05 , log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 12),
axis.title.y = element_text(size = 14),
axis.title.x = element_text(size = 14),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
res4 <- difftest(ps_raw_week7, group= "Group", ref = "Placebo")
res4$Phylum <- res4$KO
write.table(res4,"results_deseq2_FMTvsPlacebo_week7.tsv", sep = "\t", quote = F, row.names = FALSE)
# DESCRIPTION
plotdiff(res4, level="Description", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for FMT vs Placebo subjects in samples collected at Week 7 \n pvalue = 0.05 , log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 12),
axis.title.y = element_text(size = 14),
axis.title.x = element_text(size = 14),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
# SYMBOL
plotdiff(res4, level="Symbol", pvalue=0.05, log2FC = 4) + geom_hline(yintercept = 0, linetype="dotted")+ geom_point(size = 4) +
labs(y = "\nLog2 Fold-Change for FMT vs Placebo subjects in samples collected at Week 7 \n pvalue = 0.05 , log2FC >= 4", x = "") +
theme(axis.text.x = element_text(color = "black", size = 12),
axis.text.y = element_text(color = "black", size = 12),
axis.title.y = element_text(size = 14),
axis.title.x = element_text(size = 14),
legend.position = "none") +
geom_hline(yintercept = 0, linetype="dotted")
Venn diagrams to perform further comparisons are designed.
## FMT
#results_deseq2_Week7vsWeek0_FMT_des <- read.delim("~/Desktop/RyC_TFM/final_results/results_deseq2_Week7vsWeek0_FMT_des.tsv", row.names=1)
results_deseq2_Week7vsWeek0_FMT_des <- read.delim("~/Desktop/RyC_TFM/final_results/results_deseq2_Week7vsWeek0_FMT.tsv")
results_deseq2_Week7vsWeek0_FMT_des_SIGNI <- results_deseq2_Week7vsWeek0_FMT_des %>% filter (results_deseq2_Week7vsWeek0_FMT_des$pvalue <= 0.05)
## PLACEBO
results_deseq2_Week7vsWeek0_Placebo_des <- read.delim("~/Desktop/RyC_TFM/final_results/results_deseq2_Week7vsWeek0_placebo.tsv")
results_deseq2_Week7vsWeek0_Placebo_des_SIGNI <- results_deseq2_Week7vsWeek0_Placebo_des %>% filter (results_deseq2_Week7vsWeek0_Placebo_des$pvalue <= 0.05)
## To create both lists
A = results_deseq2_Week7vsWeek0_FMT_des_SIGNI %>% dplyr::select(KO) %>% unlist()
B = results_deseq2_Week7vsWeek0_Placebo_des_SIGNI %>% dplyr::select(KO) %>% unlist()
## Venn Diagrams
vennPlot <- venn.diagram(list( A = A, B = B),fill = c("forestgreen", "darkorchid4"), na = "remove", category.names = c("Week 7 vs Week 0 in FMT subjects", "Week 7 vs Week 0 in Placebo subjects"), alpha = c(0.4, 0.4), cex = 1, cat.fontface = 4,lty =2, fontfamily =3, filename = NULL, cat.dist = c(0.03, 0.03),
cat.pos = c(1, 4 )) ; grid.newpage(); grid.draw(vennPlot)
## W0
results_deseq2_FMTvsPlacebo_week0_des <- read.delim("~/Desktop/RyC_TFM/final_results/results_deseq2_FMTvsPlacebo_week0.tsv")
results_deseq2_FMTvsPlacebo_week0_des_SIGNI <- results_deseq2_FMTvsPlacebo_week0_des %>% filter(results_deseq2_FMTvsPlacebo_week0_des$pvalue <= 0.05)
## W7
results_deseq2_FMTvsPlacebo_week7_des <- read.delim("~/Desktop/RyC_TFM/final_results/results_deseq2_FMTvsPlacebo_week7.tsv")
results_deseq2_FMTvsPlacebo_week7_des_SIGNI <- results_deseq2_FMTvsPlacebo_week7_des %>% filter (results_deseq2_FMTvsPlacebo_week7_des$pvalue <= 0.05)
## To create both lists
A = results_deseq2_FMTvsPlacebo_week0_des_SIGNI %>% dplyr::select(KO) %>% unlist()
B = results_deseq2_FMTvsPlacebo_week7_des_SIGNI %>% dplyr::select(KO) %>% unlist()
## Venn Diagrams
vennPlot <- venn.diagram(list( A = A, B = B),fill = c("brown2", "darkgoldenrod2"), na = "remove", category.names = c("FMT vs Placebo in Week 0" , "FMT vs Placebo in Week 7"), alpha = c(0.4, 0.4), cex = 1, cat.fontface = 4,lty =2, fontfamily =3, filename = NULL, cat.dist = c(0.02, 0.02), cat.pos = c(-5, -3)) ; grid.newpage(); grid.draw(vennPlot)